<?php
/**
 * Zend Framework
 *
 * LICENSE
 *
 * This source file is subject to the new BSD license that is bundled
 * with this package in the file LICENSE.txt.
 * It is also available through the world-wide-web at this URL:
 * http://framework.zend.com/license/new-bsd
 * If you did not receive a copy of the license and are unable to
 * obtain it through the world-wide-web, please send an email
 * to license@zend.com so we can send you a copy immediately.
 *
 * @category   Zend
 * @package    Zend_Pdf
 * @subpackage FileParser
 * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 * @version    $Id: FileParser.php 16541 2009-07-07 06:59:03Z bkarwin $
 */

/**
 * Abstract utility class for parsing binary files.
 *
 * Provides a library of methods to quickly navigate and extract various data
 * types (signed and unsigned integers, floating- and fixed-point numbers,
 * strings, etc.) from the file.
 *
 * File access is managed via a {@link Zend_Pdf_FileParserDataSource} object.
 * This allows the same parser code to work with many different data sources:
 * in-memory objects, filesystem files, etc.
 *
 * @package    Zend_Pdf
 * @subpackage FileParser
 * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 */
abstract class Zend_Pdf_FileParser
{
	/**** Class Constants ****/

	/**
	 * Little-endian byte order (0x04 0x03 0x02 0x01).
	 */
	const BYTE_ORDER_LITTLE_ENDIAN = 0;

	/**
	 * Big-endian byte order (0x01 0x02 0x03 0x04).
	 */
	const BYTE_ORDER_BIG_ENDIAN    = 1;



	/**** Instance Variables ****/


	/**
	 * Flag indicating that the file has passed a cursory validation check.
	 * @var boolean
	 */
	protected $_isScreened = false;

	/**
	 * Flag indicating that the file has been sucessfully parsed.
	 * @var boolean
	 */
	protected $_isParsed = false;

	/**
	 * Object representing the data source to be parsed.
	 * @var Zend_Pdf_FileParserDataSource
	 */
	protected $_dataSource = null;



	/**** Public Interface ****/


	/* Abstract Methods */

	/**
	 * Performs a cursory check to verify that the binary file is in the expected
	 * format. Intended to quickly weed out obviously bogus files.
	 *
	 * Must set $this->_isScreened to true if successful.
	 *
	 * @throws Zend_Pdf_Exception
	 */
	abstract public function screen();

	/**
	 * Reads and parses the complete binary file.
	 *
	 * Must set $this->_isParsed to true if successful.
	 *
	 * @throws Zend_Pdf_Exception
	 */
	abstract public function parse();


	/* Object Lifecycle */

	/**
	 * Object constructor.
	 *
	 * Verifies that the data source has been properly initialized.
	 *
	 * @param Zend_Pdf_FileParserDataSource $dataSource
	 * @throws Zend_Pdf_Exception
	 */
	public function __construct(Zend_Pdf_FileParserDataSource $dataSource)
	{
		if ($dataSource->getSize() == 0) {
			require_once 'Zend/Pdf/Exception.php';
			throw new Zend_Pdf_Exception('The data source has not been properly initialized',
			Zend_Pdf_Exception::BAD_DATA_SOURCE);
		}
		$this->_dataSource = $dataSource;
	}

	/**
	 * Object destructor.
	 *
	 * Discards the data source object.
	 */
	public function __destruct()
	{
		$this->_dataSource = null;
	}


	/* Accessors */

	/**
	 * Returns true if the file has passed a cursory validation check.
	 *
	 * @return boolean
	 */
	public function isScreened()
	{
		return $this->_isScreened;
	}

	/**
	 * Returns true if the file has been successfully parsed.
	 *
	 * @return boolean
	 */
	public function isParsed()
	{
		return $this->_isParsed;
	}

	/**
	 * Returns the data source object representing the file being parsed.
	 *
	 * @return Zend_Pdf_FileParserDataSource
	 */
	public function getDataSource()
	{
		return $this->_dataSource;
	}


	/* Primitive Methods */

	/**
	 * Convenience wrapper for the data source object's moveToOffset() method.
	 *
	 * @param integer $offset Destination byte offset.
	 * @throws Zend_Pdf_Exception
	 */
	public function moveToOffset($offset)
	{
		$this->_dataSource->moveToOffset($offset);
	}

	public function getOffset() {
		return $this->_dataSource->getOffset();
	}

	public function getSize() {
		return $this->_dataSource->getSize();
	}

	/**
	 * Convenience wrapper for the data source object's readBytes() method.
	 *
	 * @param integer $byteCount Number of bytes to read.
	 * @return string
	 * @throws Zend_Pdf_Exception
	 */
	public function readBytes($byteCount)
	{
		return $this->_dataSource->readBytes($byteCount);
	}

	/**
	 * Convenience wrapper for the data source object's skipBytes() method.
	 *
	 * @param integer $byteCount Number of bytes to skip.
	 * @throws Zend_Pdf_Exception
	 */
	public function skipBytes($byteCount)
	{
		$this->_dataSource->skipBytes($byteCount);
	}


	/* Parser Methods */

	/**
	 * Reads the signed integer value from the binary file at the current byte
	 * offset.
	 *
	 * Advances the offset by the number of bytes read. Throws an exception if
	 * an error occurs.
	 *
	 * @param integer $size Size of integer in bytes: 1-4
	 * @param integer $byteOrder (optional) Big- or little-endian byte order.
	 *   Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}.
	 *   If omitted, uses big-endian.
	 * @return integer
	 * @throws Zend_Pdf_Exception
	 */
	public function readInt($size, $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN)
	{
		if (($size < 1) || ($size > 4)) {
			require_once 'Zend/Pdf/Exception.php';
			throw new Zend_Pdf_Exception("Invalid signed integer size: $size",
			Zend_Pdf_Exception::INVALID_INTEGER_SIZE);
		}
		$bytes = $this->_dataSource->readBytes($size);
		/* unpack() will not work for this method because it always works in
		 * the host byte order for signed integers. It also does not allow for
		 * variable integer sizes.
		 */
		if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) {
			$number = ord($bytes[0]);
			if (($number & 0x80) == 0x80) {
				/* This number is negative. Extract the positive equivalent.
				 */
				$number = (~ $number) & 0xff;
				for ($i = 1; $i < $size; $i++) {
					$number = ($number << 8) | ((~ ord($bytes[$i])) & 0xff);
				}
				/* Now turn this back into a negative number by taking the
				 * two's complement (we didn't add one above so won't
				 * subtract it below). This works reliably on both 32- and
				 * 64-bit systems.
				 */
				$number = ~$number;
			} else {
				for ($i = 1; $i < $size; $i++) {
					$number = ($number << 8) | ord($bytes[$i]);
				}
			}
		} else if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_LITTLE_ENDIAN) {
			$number = ord($bytes[$size - 1]);
			if (($number & 0x80) == 0x80) {
				/* Negative number. See discussion above.
				 */
				$number = 0;
				for ($i = --$size; $i >= 0; $i--) {
					$number |= ((~ ord($bytes[$i])) & 0xff) << ($i * 8);
				}
				$number = ~$number;
			} else {
				$number = 0;
				for ($i = --$size; $i >= 0; $i--) {
					$number |= ord($bytes[$i]) << ($i * 8);
				}
			}
		} else {
			require_once 'Zend/Pdf/Exception.php';
			throw new Zend_Pdf_Exception("Invalid byte order: $byteOrder",
			Zend_Pdf_Exception::INVALID_BYTE_ORDER);
		}
		return $number;
	}

	/**
	 * Reads the unsigned integer value from the binary file at the current byte
	 * offset.
	 *
	 * Advances the offset by the number of bytes read. Throws an exception if
	 * an error occurs.
	 *
	 * NOTE: If you ask for a 4-byte unsigned integer on a 32-bit machine, the
	 * resulting value WILL BE SIGNED because PHP uses signed integers internally
	 * for everything. To guarantee portability, be sure to use bitwise operators
	 * operators on large unsigned integers!
	 *
	 * @param integer $size Size of integer in bytes: 1-4
	 * @param integer $byteOrder (optional) Big- or little-endian byte order.
	 *   Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}.
	 *   If omitted, uses big-endian.
	 * @return integer
	 * @throws Zend_Pdf_Exception
	 */
	public function readUInt($size, $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN)
	{
		if (($size < 1) || ($size > 4)) {
			require_once 'Zend/Pdf/Exception.php';
			throw new Zend_Pdf_Exception("Invalid unsigned integer size: $size",
			Zend_Pdf_Exception::INVALID_INTEGER_SIZE);
		}
		$bytes = $this->_dataSource->readBytes($size);
		/* unpack() is a bit heavyweight for this simple conversion. Just
		 * work the bytes directly.
		 */
		if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) {
			$number = ord($bytes[0]);
			for ($i = 1; $i < $size; $i++) {
				$number = ($number << 8) | ord($bytes[$i]);
			}
		} else if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_LITTLE_ENDIAN) {
			$number = 0;
			for ($i = --$size; $i >= 0; $i--) {
				$number |= ord($bytes[$i]) << ($i * 8);
			}
		} else {
			require_once 'Zend/Pdf/Exception.php';
			throw new Zend_Pdf_Exception("Invalid byte order: $byteOrder",
			Zend_Pdf_Exception::INVALID_BYTE_ORDER);
		}
		return $number;
	}

	/**
	 * Returns true if the specified bit is set in the integer bitfield.
	 *
	 * @param integer $bit Bit number to test (i.e. - 0-31)
	 * @param integer $bitField
	 * @return boolean
	 */
	public function isBitSet($bit, $bitField)
	{
		$bitMask = 1 << $bit;
		$isSet = (($bitField & $bitMask) == $bitMask);
		return $isSet;
	}

	/**
	 * Reads the signed fixed-point number from the binary file at the current
	 * byte offset.
	 *
	 * Common fixed-point sizes are 2.14 and 16.16.
	 *
	 * Advances the offset by the number of bytes read. Throws an exception if
	 * an error occurs.
	 *
	 * @param integer $mantissaBits Number of bits in the mantissa
	 * @param integer $fractionBits Number of bits in the fraction
	 * @param integer $byteOrder (optional) Big- or little-endian byte order.
	 *   Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}.
	 *   If omitted, uses big-endian.
	 * @return float
	 * @throws Zend_Pdf_Exception
	 */
	public function readFixed($mantissaBits, $fractionBits,
	$byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN)
	{
		$bitsToRead = $mantissaBits + $fractionBits;
		if (($bitsToRead % 8) !== 0) {
			require_once 'Zend/Pdf/Exception.php';
			throw new Zend_Pdf_Exception('Fixed-point numbers are whole bytes',
			Zend_Pdf_Exception::BAD_FIXED_POINT_SIZE);
		}
		$number = $this->readInt(($bitsToRead >> 3), $byteOrder) / (1 << $fractionBits);
		return $number;
	}

	/**
	 * Reads the Unicode UTF-16-encoded string from the binary file at the
	 * current byte offset.
	 *
	 * The byte order of the UTF-16 string must be specified. You must also
	 * supply the desired resulting character set.
	 *
	 * Advances the offset by the number of bytes read. Throws an exception if
	 * an error occurs.
	 *
	 * @todo Consider changing $byteCount to a character count. They are not
	 *   always equivalent (in the case of surrogates).
	 * @todo Make $byteOrder optional if there is a byte-order mark (BOM) in the
	 *   string being extracted.
	 *
	 * @param integer $byteCount Number of bytes (characters * 2) to return.
	 * @param integer $byteOrder (optional) Big- or little-endian byte order.
	 *   Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}.
	 *   If omitted, uses big-endian.
	 * @param string $characterSet (optional) Desired resulting character set.
	 *   You may use any character set supported by {@link iconv()}. If omitted,
	 *   uses 'current locale'.
	 * @return string
	 * @throws Zend_Pdf_Exception
	 */
	public function readStringUTF16($byteCount,
	$byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN,
	$characterSet = '')
	{
		if ($byteCount == 0) {
			return '';
		}
		$bytes = $this->_dataSource->readBytes($byteCount);
		if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) {
			if ($characterSet == 'UTF-16BE') {
				return $bytes;
			}
			return iconv('UTF-16BE', $characterSet, $bytes);
		} else if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_LITTLE_ENDIAN) {
			if ($characterSet == 'UTF-16LE') {
				return $bytes;
			}
			return iconv('UTF-16LE', $characterSet, $bytes);
		} else {
			require_once 'Zend/Pdf/Exception.php';
			throw new Zend_Pdf_Exception("Invalid byte order: $byteOrder",
			Zend_Pdf_Exception::INVALID_BYTE_ORDER);
		}
	}

	/**
	 * Reads the Mac Roman-encoded string from the binary file at the current
	 * byte offset.
	 *
	 * You must supply the desired resulting character set.
	 *
	 * Advances the offset by the number of bytes read. Throws an exception if
	 * an error occurs.
	 *
	 * @param integer $byteCount Number of bytes (characters) to return.
	 * @param string $characterSet (optional) Desired resulting character set.
	 *   You may use any character set supported by {@link iconv()}. If omitted,
	 *   uses 'current locale'.
	 * @return string
	 * @throws Zend_Pdf_Exception
	 */
	public function readStringMacRoman($byteCount, $characterSet = '')
	{
		if ($byteCount == 0) {
			return '';
		}
		$bytes = $this->_dataSource->readBytes($byteCount);
		if ($characterSet == 'MacRoman') {
			return $bytes;
		}
		return iconv('MacRoman', $characterSet, $bytes);
	}

	/**
	 * Reads the Pascal string from the binary file at the current byte offset.
	 *
	 * The length of the Pascal string is determined by reading the length bytes
	 * which preceed the character data. You must supply the desired resulting
	 * character set.
	 *
	 * Advances the offset by the number of bytes read. Throws an exception if
	 * an error occurs.
	 *
	 * @param string $characterSet (optional) Desired resulting character set.
	 *   You may use any character set supported by {@link iconv()}. If omitted,
	 *   uses 'current locale'.
	 * @param integer $lengthBytes (optional) Number of bytes that make up the
	 *   length. Default is 1.
	 * @return string
	 * @throws Zend_Pdf_Exception
	 */
	public function readStringPascal($characterSet = '', $lengthBytes = 1)
	{
		$byteCount = $this->readUInt($lengthBytes);
		if ($byteCount == 0) {
			return '';
		}
		$bytes = $this->_dataSource->readBytes($byteCount);
		if ($characterSet == 'ASCII') {
			return $bytes;
		}
		return iconv('ASCII', $characterSet, $bytes);
	}

}
